import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib
matplotlib.use('TkAgg')  # 设置后端为TkAgg
# 生成示例数据（年龄、收入和购买决策）
np.random.seed(0)
age = np.random.randint(18, 70, 200)
income = np.random.randint(2000, 20000, 200)
X = np.column_stack((age, income))

# 创建购买决策（非线性关系）
y = (age > 40) | (income > 10000)
y = y.astype(int)

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 创建逻辑回归模型
model = LogisticRegression()

# 训练模型
model.fit(X_train, y_train)

# 在测试集上进行预测
y_pred = model.predict(X_test)

# 计算准确率
accuracy = accuracy_score(y_test, y_pred)
print(f"准确率: {accuracy:.2f}")

# 打印混淆矩阵
cm = confusion_matrix(y_test, y_pred)
print("混淆矩阵:")
print(cm)

# 可视化结果
plt.figure(figsize=(10, 6))
plt.scatter(X_test[y_test == 1][:, 0], X_test[y_test == 1][:, 1], c='blue', marker='o', label='购买')
plt.scatter(X_test[y_test == 0][:, 0], X_test[y_test == 0][:, 1], c='red', marker='x', label='未购买')

# 绘制决策边界
h = 0.1
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))

Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, alpha=0.3, cmap=plt.cm.coolwarm)

plt.xlabel('年龄')
plt.ylabel('收入')
plt.title('逻辑回归预测客户购买行为')
plt.legend()
plt.show()